Use RCU for domain_list and domain_hash.

author kfraser@localhost.localdomain <kfraser@localhost.localdomain>

Wed, 21 Feb 2007 16:13:49 +0000 (16:13 +0000)

committer kfraser@localhost.localdomain <kfraser@localhost.localdomain>

Wed, 21 Feb 2007 16:13:49 +0000 (16:13 +0000)
author kfraser@localhost.localdomain <kfraser@localhost.localdomain>
Wed, 21 Feb 2007 16:13:49 +0000 (16:13 +0000)
committer kfraser@localhost.localdomain <kfraser@localhost.localdomain>
Wed, 21 Feb 2007 16:13:49 +0000 (16:13 +0000)
diff --git a/xen/acm/acm_chinesewall_hooks.c b/xen/acm/acm_chinesewall_hooks.c

index a2071668b4ac844da2067780d9ca2afd0c882fb3..05f95256d049bbfded1a40a79b5560e5e3ced6d2 100644 (file)
--- a/xen/acm/acm_chinesewall_hooks.c
+++ b/xen/acm/acm_chinesewall_hooks.c
@@ -194,19 +194,18 @@ chwall_init_state(struct acm_chwall_policy_buffer *chwall_buf,
      int violation = 0, i, j;
      struct chwall_ssid *chwall_ssid;
      ssidref_t chwall_ssidref;
-    struct domain **pd;
+    struct domain *d;
  
-    write_lock(&domlist_lock);
+    spin_lock(&domlist_update_lock);
      /* go through all domains and adjust policy as if this domain was started now */
-    pd = &domain_list;
-    for (pd = &domain_list; *pd != NULL; pd = &(*pd)->next_in_list)
+    for_each_domain ( d )
      {
          chwall_ssid =
              GET_SSIDP(ACM_CHINESE_WALL_POLICY,
-                      (struct acm_ssid_domain *) (*pd)->ssid);
+                      (struct acm_ssid_domain *)d->ssid);
          chwall_ssidref = chwall_ssid->chwall_ssidref;
          traceprintk("%s: validating policy for domain %x (chwall-REF=%x).\n",
-                    __func__, (*pd)->domain_id, chwall_ssidref);
+                    __func__, d->domain_id, chwall_ssidref);
          /* a) adjust types ref-count for running domains */
          for (i = 0; i < chwall_buf->chwall_max_types; i++)
              running_types[i] +=
@@ -247,7 +246,7 @@ chwall_init_state(struct acm_chwall_policy_buffer *chwall_buf,
          }
      }
   out:
-    write_unlock(&domlist_lock);
+    spin_unlock(&domlist_update_lock);
      return violation;
      /* returning "violation != 0" means that the currently running set of domains would
       * not be possible if the new policy had been enforced before starting them; for chinese
diff --git a/xen/acm/acm_simple_type_enforcement_hooks.c b/xen/acm/acm_simple_type_enforcement_hooks.c

index 0904859a8505dda98894daa56f5316f775cbb905..04c2d7f0f1394e8bc211c4cab7fcd417c535769b 100644 (file)
--- a/xen/acm/acm_simple_type_enforcement_hooks.c
+++ b/xen/acm/acm_simple_type_enforcement_hooks.c
@@ -175,36 +175,37 @@ ste_init_state(struct acm_ste_policy_buffer *ste_buf, domaintype_t *ssidrefs)
      int violation = 1;
      struct ste_ssid *ste_ssid, *ste_rssid;
      ssidref_t ste_ssidref, ste_rssidref;
-    struct domain **pd, *rdom;
+    struct domain *d, *rdom;
      domid_t rdomid;
      struct grant_entry sha_copy;
      int port, i;
  
-    read_lock(&domlist_lock); /* go by domain? or directly by global? event/grant list */
+    rcu_read_lock(&domlist_read_lock);
+    /* go by domain? or directly by global? event/grant list */
      /* go through all domains and adjust policy as if this domain was started now */
-    pd = &domain_list;
-    for ( pd = &domain_list; *pd != NULL; pd = &(*pd)->next_in_list ) {
+    for_each_domain ( d )
+    {
          ste_ssid = GET_SSIDP(ACM_SIMPLE_TYPE_ENFORCEMENT_POLICY, 
-                             (struct acm_ssid_domain *)(*pd)->ssid);
+                             (struct acm_ssid_domain *)d->ssid);
          ste_ssidref = ste_ssid->ste_ssidref;
          traceprintk("%s: validating policy for eventch domain %x (ste-Ref=%x).\n",
-                    __func__, (*pd)->domain_id, ste_ssidref);
+                    __func__, d->domain_id, ste_ssidref);
          /* a) check for event channel conflicts */
          for (port=0; port < NR_EVTCHN_BUCKETS; port++) {
-            spin_lock(&(*pd)->evtchn_lock);
-            if ((*pd)->evtchn[port] == NULL) {
-                spin_unlock(&(*pd)->evtchn_lock);
+            spin_lock(&d->evtchn_lock);
+            if (d->evtchn[port] == NULL) {
+                spin_unlock(&d->evtchn_lock);
                  continue;
              }
-            if ((*pd)->evtchn[port]->state == ECS_INTERDOMAIN) {
-                rdom = (*pd)->evtchn[port]->u.interdomain.remote_dom;
+            if (d->evtchn[port]->state == ECS_INTERDOMAIN) {
+                rdom = d->evtchn[port]->u.interdomain.remote_dom;
                  rdomid = rdom->domain_id;
                  /* rdom now has remote domain */
                  ste_rssid = GET_SSIDP(ACM_SIMPLE_TYPE_ENFORCEMENT_POLICY, 
                                        (struct acm_ssid_domain *)(rdom->ssid));
                  ste_rssidref = ste_rssid->ste_ssidref;
-            } else if ((*pd)->evtchn[port]->state == ECS_UNBOUND) {
-                rdomid = (*pd)->evtchn[port]->u.unbound.remote_domid;
+            } else if (d->evtchn[port]->state == ECS_UNBOUND) {
+                rdomid = d->evtchn[port]->u.unbound.remote_domid;
                  if ((rdom = get_domain_by_id(rdomid)) == NULL) {
                      printk("%s: Error finding domain to id %x!\n", __func__, rdomid);
                      goto out;
@@ -215,36 +216,36 @@ ste_init_state(struct acm_ste_policy_buffer *ste_buf, domaintype_t *ssidrefs)
                  ste_rssidref = ste_rssid->ste_ssidref;
                  put_domain(rdom);
              } else {
-                spin_unlock(&(*pd)->evtchn_lock);
+                spin_unlock(&d->evtchn_lock);
                  continue; /* port unused */
              }
-            spin_unlock(&(*pd)->evtchn_lock);
+            spin_unlock(&d->evtchn_lock);
  
              /* rdom now has remote domain */
              ste_rssid = GET_SSIDP(ACM_SIMPLE_TYPE_ENFORCEMENT_POLICY, 
                                    (struct acm_ssid_domain *)(rdom->ssid));
              ste_rssidref = ste_rssid->ste_ssidref;
              traceprintk("%s: eventch: domain %x (ssidref %x) --> domain %x (rssidref %x) used (port %x).\n", 
-                        __func__, (*pd)->domain_id, ste_ssidref, rdom->domain_id, ste_rssidref, port);  
+                        __func__, d->domain_id, ste_ssidref, rdom->domain_id, ste_rssidref, port);  
              /* check whether on subj->ssid, obj->ssid share a common type*/
              if (!have_common_type(ste_ssidref, ste_rssidref)) {
                  printkd("%s: Policy violation in event channel domain %x -> domain %x.\n",
-                        __func__, (*pd)->domain_id, rdomid);
+                        __func__, d->domain_id, rdomid);
                  goto out;
              }
          } 
          /* b) check for grant table conflicts on shared pages */
-        spin_lock(&(*pd)->grant_table->lock);
-        for ( i = 0; i < nr_grant_entries((*pd)->grant_table); i++ ) {
+        spin_lock(&d->grant_table->lock);
+        for ( i = 0; i < nr_grant_entries(d->grant_table); i++ ) {
  #define SPP (PAGE_SIZE / sizeof(struct grant_entry))
-            sha_copy = (*pd)->grant_table->shared[i/SPP][i%SPP];
+            sha_copy = d->grant_table->shared[i/SPP][i%SPP];
              if ( sha_copy.flags ) {
                  printkd("%s: grant dom (%hu) SHARED (%d) flags:(%hx) dom:(%hu) frame:(%lx)\n",
-                        __func__, (*pd)->domain_id, i, sha_copy.flags, sha_copy.domid, 
+                        __func__, d->domain_id, i, sha_copy.flags, sha_copy.domid, 
                          (unsigned long)sha_copy.frame);
                  rdomid = sha_copy.domid;
                  if ((rdom = get_domain_by_id(rdomid)) == NULL) {
-                    spin_unlock(&(*pd)->grant_table->lock);
+                    spin_unlock(&d->grant_table->lock);
                      printkd("%s: domain not found ERROR!\n", __func__);
                      goto out;
                  };
@@ -254,18 +255,18 @@ ste_init_state(struct acm_ste_policy_buffer *ste_buf, domaintype_t *ssidrefs)
                  ste_rssidref = ste_rssid->ste_ssidref;
                  put_domain(rdom);
                  if (!have_common_type(ste_ssidref, ste_rssidref)) {
-                    spin_unlock(&(*pd)->grant_table->lock);
+                    spin_unlock(&d->grant_table->lock);
                      printkd("%s: Policy violation in grant table sharing domain %x -> domain %x.\n",
-                            __func__, (*pd)->domain_id, rdomid);
+                            __func__, d->domain_id, rdomid);
                      goto out;
                  }
              }
          }
-        spin_unlock(&(*pd)->grant_table->lock);
+        spin_unlock(&d->grant_table->lock);
      }
      violation = 0;
   out:
-    read_unlock(&domlist_lock);
+    rcu_read_unlock(&domlist_read_lock);
      return violation;
      /* returning "violation != 0" means that existing sharing between domains would not 
       * have been allowed if the new policy had been enforced before the sharing; for ste, 
@@ -281,7 +282,7 @@ ste_set_policy(u8 *buf, u32 buf_size)
      struct acm_ste_policy_buffer *ste_buf = (struct acm_ste_policy_buffer *)buf;
      void *ssidrefsbuf;
      struct ste_ssid *ste_ssid;
-    struct domain **pd;
+    struct domain *d;
      int i;
  
      if (buf_size < sizeof(struct acm_ste_policy_buffer))
@@ -326,15 +327,14 @@ ste_set_policy(u8 *buf, u32 buf_size)
      ste_bin_pol.ssidrefs = (domaintype_t *)ssidrefsbuf;
  
      /* clear all ste caches */
-    read_lock(&domlist_lock);
-    pd = &domain_list;
-    for ( pd = &domain_list; *pd != NULL; pd = &(*pd)->next_in_list ) {
+    rcu_read_lock(&domlist_read_lock);
+    for_each_domain ( d ) {
          ste_ssid = GET_SSIDP(ACM_SIMPLE_TYPE_ENFORCEMENT_POLICY, 
-                             (struct acm_ssid_domain *)(*pd)->ssid);
+                             (struct acm_ssid_domain *)(d)->ssid);
          for (i=0; i<ACM_TE_CACHE_SIZE; i++)
              ste_ssid->ste_cache[i].valid = ACM_STE_free;
      }
-    read_unlock(&domlist_lock);
+    rcu_read_unlock(&domlist_read_lock);
      return ACM_OK;
  
   error_free:
@@ -436,14 +436,14 @@ clean_id_from_cache(domid_t id)
  {
      struct ste_ssid *ste_ssid;
      int i;
-    struct domain **pd;
+    struct domain *d;
      struct acm_ssid_domain *ssid;
  
      printkd("deleting cache for dom %x.\n", id);
-    read_lock(&domlist_lock); /* look through caches of all domains */
-    pd = &domain_list;
-    for ( pd = &domain_list; *pd != NULL; pd = &(*pd)->next_in_list ) {
-        ssid = (struct acm_ssid_domain *)((*pd)->ssid);
+    rcu_read_lock(&domlist_read_lock);
+    /* look through caches of all domains */
+    for_each_domain ( d ) {
+        ssid = (struct acm_ssid_domain *)(d->ssid);
  
          if (ssid == NULL)
              continue; /* hanging domain structure, no ssid any more ... */
@@ -459,7 +459,7 @@ clean_id_from_cache(domid_t id)
                  ste_ssid->ste_cache[i].valid = ACM_STE_free;
      }
   out:
-    read_unlock(&domlist_lock);
+    rcu_read_unlock(&domlist_read_lock);
  }
  
  /***************************
diff --git a/xen/arch/ia64/linux-xen/mca.c b/xen/arch/ia64/linux-xen/mca.c

index 5a820973b434f426351f1359f95454a47229b1d1..f1f47c68e25bed07f63f0a46c867fb2f9cd349de 100644 (file)
--- a/xen/arch/ia64/linux-xen/mca.c
+++ b/xen/arch/ia64/linux-xen/mca.c
@@ -790,6 +790,7 @@ init_handler_platform (pal_min_state_area_t *ms,
                         /* this route is for dump routine */
                         unw_init_running(try_crashdump, pt);
                 } else {
+                       rcu_read_lock(&domlist_read_lock);
                         for_each_domain(d) {
                                 for_each_vcpu(d, v) {
                                         printk("Backtrace of current vcpu "
@@ -798,6 +799,7 @@ init_handler_platform (pal_min_state_area_t *ms,
                                         show_stack(v, NULL);
                                 }
                         }
+                       rcu_read_unlock(&domlist_read_lock);
                 }
         }
         unw_init_running(freeze_cpu_osinit, NULL);
diff --git a/xen/arch/ia64/linux-xen/perfmon.c b/xen/arch/ia64/linux-xen/perfmon.c

index 6ee588e26c6757526fd247fc92fb04bec741b77e..9400c7decf66705a8dbdc66cc824a4e8e5c8f3fe 100644 (file)
--- a/xen/arch/ia64/linux-xen/perfmon.c
+++ b/xen/arch/ia64/linux-xen/perfmon.c
@@ -7225,7 +7225,6 @@ DEFINE_PER_CPU(pfm_context_t*, xenpfm_context);
  /*
   * note: some functions mask interrupt with this lock held
   * so that this lock can't be locked from interrupt handler.
- * lock order domlist_lock => xenpfm_context_lock
   */
  DEFINE_SPINLOCK(xenpfm_context_lock);
  
@@ -7507,10 +7506,8 @@ xenpfm_context_unload(void)
                 arg.error[cpu] = 0;
  
         BUG_ON(in_irq());
-       read_lock(&domlist_lock);
         spin_lock(&xenpfm_context_lock);
         error = xenpfm_start_stop_locked(0);
-       read_unlock(&domlist_lock);
         if (error) {
                 spin_unlock(&xenpfm_context_lock);
                 return error;
@@ -7688,10 +7685,11 @@ xenpfm_start_stop_locked(int is_start)
         while (atomic_read(&arg.started) != cpus)
                 cpu_relax();
  
-       for_each_domain(d) {
+       rcu_read_lock(&domlist_read_lock);
+       for_each_domain(d)
                 for_each_vcpu(d, v)
                         xenpfm_start_stop_vcpu(v, is_start);
-       }
+       rcu_read_unlock(&domlist_read_lock);
  
         arg.error[smp_processor_id()] = __xenpfm_start_stop(is_start);
         atomic_inc(&arg.finished);
@@ -7716,11 +7714,9 @@ xenpfm_start_stop(int is_start)
         int error;
         
         BUG_ON(in_irq());
-       read_lock(&domlist_lock);
         spin_lock(&xenpfm_context_lock);
-       error =xenpfm_start_stop_locked(is_start);
+       error = xenpfm_start_stop_locked(is_start);
         spin_unlock(&xenpfm_context_lock);
-       read_unlock(&domlist_lock);
  
         return error;
  }
diff --git a/xen/arch/powerpc/audit.c b/xen/arch/powerpc/audit.c

index 2c6c12b59821b65000db9615e7289652775d1839..e39bbf26abf79428cdc8d80693c5c9d69623e613 100644 (file)
--- a/xen/arch/powerpc/audit.c
+++ b/xen/arch/powerpc/audit.c
@@ -34,8 +34,10 @@ void audit_domain(struct domain *d)
  void audit_domains(void)
  {
      struct domain *d;
+    rcu_read_lock(&domlist_read_lock);
      for_each_domain ( d )
          audit_domain(d);
+    rcu_read_unlock(&domlist_read_lock);
  }
  
  void audit_domains_key(unsigned char key)
diff --git a/xen/arch/x86/hvm/svm/vmcb.c b/xen/arch/x86/hvm/svm/vmcb.c

index 0a0622fb757376b8dc82a594af613c60642892c5..25b30f8e58f93f25dbd126aca886d802982878f8 100644 (file)
--- a/xen/arch/x86/hvm/svm/vmcb.c
+++ b/xen/arch/x86/hvm/svm/vmcb.c
@@ -330,6 +330,9 @@ static void vmcb_dump(unsigned char ch)
      struct vcpu *v;
      
      printk("*********** VMCB Areas **************\n");
+
+    rcu_read_lock(&domlist_read_lock);
+
      for_each_domain ( d )
      {
          if ( !is_hvm_domain(d) )
@@ -342,6 +345,8 @@ static void vmcb_dump(unsigned char ch)
          }
      }
  
+    rcu_read_unlock(&domlist_read_lock);
+
      printk("**************************************\n");
  }
  
diff --git a/xen/arch/x86/hvm/vmx/vmcs.c b/xen/arch/x86/hvm/vmx/vmcs.c

index 9aa54d1922e6e2fbf60501904ad16c377c49d524..93aa19774ebf672c6286f74dc84ab7f05cf3f41a 100644 (file)
--- a/xen/arch/x86/hvm/vmx/vmcs.c
+++ b/xen/arch/x86/hvm/vmx/vmcs.c
@@ -567,6 +567,9 @@ static void vmcs_dump(unsigned char ch)
      struct vcpu *v;
      
      printk("*********** VMCS Areas **************\n");
+
+    rcu_read_lock(&domlist_read_lock);
+
      for_each_domain ( d )
      {
          if ( !is_hvm_domain(d) )
@@ -581,6 +584,8 @@ static void vmcs_dump(unsigned char ch)
          }
      }
  
+    rcu_read_unlock(&domlist_read_lock);
+
      printk("**************************************\n");
  }
  
diff --git a/xen/arch/x86/mm/shadow/common.c b/xen/arch/x86/mm/shadow/common.c

index 23d33516053811dcefac34523d8fe5e0fc7feb86..44637bcd4b8010e3009af18e9632877ffe5cdb3a 100644 (file)
--- a/xen/arch/x86/mm/shadow/common.c
+++ b/xen/arch/x86/mm/shadow/common.c
@@ -890,13 +890,17 @@ static void shadow_blow_all_tables(unsigned char c)
  {
      struct domain *d;
      printk("'%c' pressed -> blowing all shadow tables\n", c);
+    rcu_read_lock(&domlist_read_lock);
      for_each_domain(d)
+    {
          if ( shadow_mode_enabled(d) && d->vcpu[0] != NULL )
          {
              shadow_lock(d);
              shadow_blow_tables(d);
              shadow_unlock(d);
          }
+    }
+    rcu_read_unlock(&domlist_read_lock);
  }
  
  /* Register this function in the Xen console keypress table */
diff --git a/xen/arch/x86/time.c b/xen/arch/x86/time.c

index b57f34f4f02c5ad0098f70ee7d84750cf3d7ac87..905e73c1c9581a4e0960e332f5ea87b8ebb0e586 100644 (file)
--- a/xen/arch/x86/time.c
+++ b/xen/arch/x86/time.c
@@ -720,10 +720,10 @@ void do_settime(unsigned long secs, unsigned long nsecs, u64 system_time_base)
      wc_nsec = _wc_nsec = (u32)y;
      spin_unlock(&wc_lock);
  
-    read_lock(&domlist_lock);
+    rcu_read_lock(&domlist_read_lock);
      for_each_domain ( d )
          update_domain_wallclock_time(d);
-    read_unlock(&domlist_lock);
+    rcu_read_unlock(&domlist_read_lock);
  }
  
  static void local_time_calibration(void *unused)
diff --git a/xen/common/domain.c b/xen/common/domain.c

index 7ec09b9d81e82820677f150025a01d25ea62ed72..b9115250062cb7621d6060916c02c15d4872cbfe 100644 (file)
--- a/xen/common/domain.c
+++ b/xen/common/domain.c
@@ -24,13 +24,18 @@
  #include <xen/shutdown.h>
  #include <xen/percpu.h>
  #include <xen/multicall.h>
+#include <xen/rcupdate.h>
  #include <asm/debugger.h>
  #include <public/sched.h>
  #include <public/vcpu.h>
  
-/* Both these structures are protected by the domlist_lock. */
-DEFINE_RWLOCK(domlist_lock);
-struct domain *domain_hash[DOMAIN_HASH_SIZE];
+/* Protect updates/reads (resp.) of domain_list and domain_hash. */
+DEFINE_SPINLOCK(domlist_update_lock);
+DEFINE_RCU_READ_LOCK(domlist_read_lock);
+
+#define DOMAIN_HASH_SIZE 256
+#define DOMAIN_HASH(_id) ((int)(_id)&(DOMAIN_HASH_SIZE-1))
+static struct domain *domain_hash[DOMAIN_HASH_SIZE];
  struct domain *domain_list;
  
  struct domain *dom0;
@@ -174,16 +179,20 @@ struct domain *domain_create(domid_t domid, unsigned int domcr_flags)
  
      if ( !is_idle_domain(d) )
      {
-        write_lock(&domlist_lock);
+        spin_lock(&domlist_update_lock);
          pd = &domain_list; /* NB. domain_list maintained in order of domid. */
          for ( pd = &domain_list; *pd != NULL; pd = &(*pd)->next_in_list )
              if ( (*pd)->domain_id > d->domain_id )
                  break;
          d->next_in_list = *pd;
-        *pd = d;
          d->next_in_hashbucket = domain_hash[DOMAIN_HASH(domid)];
-        domain_hash[DOMAIN_HASH(domid)] = d;
-        write_unlock(&domlist_lock);
+        /* Two rcu assignments are not atomic 
+         * Readers may see inconsistent domlist and hash table
+         * That is OK as long as each RCU reader-side critical section uses
+         * only one or them  */
+        rcu_assign_pointer(*pd, d);
+        rcu_assign_pointer(domain_hash[DOMAIN_HASH(domid)], d);
+        spin_unlock(&domlist_update_lock);
      }
  
      return d;
@@ -207,8 +216,8 @@ struct domain *get_domain_by_id(domid_t dom)
  {
      struct domain *d;
  
-    read_lock(&domlist_lock);
-    d = domain_hash[DOMAIN_HASH(dom)];
+    rcu_read_lock(&domlist_read_lock);
+    d = rcu_dereference(domain_hash[DOMAIN_HASH(dom)]);
      while ( d != NULL )
      {
          if ( d->domain_id == dom )
@@ -217,9 +226,9 @@ struct domain *get_domain_by_id(domid_t dom)
                  d = NULL;
              break;
          }
-        d = d->next_in_hashbucket;
+        d = rcu_dereference(d->next_in_hashbucket);
      }
-    read_unlock(&domlist_lock);
+    rcu_read_unlock(&domlist_read_lock);
  
      return d;
  }
@@ -314,6 +323,23 @@ void domain_pause_for_debugger(void)
      send_guest_global_virq(dom0, VIRQ_DEBUGGER);
  }
  
+/* Complete domain destroy after RCU readers are not holding 
+   old references */
+static void complete_domain_destroy(struct rcu_head *head)
+{
+    struct domain *d = container_of(head, struct domain, rcu);
+
+    rangeset_domain_destroy(d);
+
+    evtchn_destroy(d);
+    grant_table_destroy(d);
+
+    arch_domain_destroy(d);
+
+    free_domain(d);
+
+    send_guest_global_virq(dom0, VIRQ_DOM_EXC);
+}
  
  /* Release resources belonging to task @p. */
  void domain_destroy(struct domain *d)
@@ -331,27 +357,19 @@ void domain_destroy(struct domain *d)
          return;
  
      /* Delete from task list and task hashtable. */
-    write_lock(&domlist_lock);
+    spin_lock(&domlist_update_lock);
      pd = &domain_list;
      while ( *pd != d ) 
          pd = &(*pd)->next_in_list;
-    *pd = d->next_in_list;
+    rcu_assign_pointer(*pd, d->next_in_list);
      pd = &domain_hash[DOMAIN_HASH(d->domain_id)];
      while ( *pd != d ) 
          pd = &(*pd)->next_in_hashbucket;
-    *pd = d->next_in_hashbucket;
-    write_unlock(&domlist_lock);
-
-    rangeset_domain_destroy(d);
-
-    evtchn_destroy(d);
-    grant_table_destroy(d);
-
-    arch_domain_destroy(d);
+    rcu_assign_pointer(*pd, d->next_in_hashbucket);
+    spin_unlock(&domlist_update_lock);
  
-    free_domain(d);
-
-    send_guest_global_virq(dom0, VIRQ_DOM_EXC);
+    /* schedule RCU asynchronous completion of domain destroy */
+    call_rcu(&d->rcu, complete_domain_destroy);
  }
  
  static void vcpu_pause_setup(struct vcpu *v)
diff --git a/xen/common/domctl.c b/xen/common/domctl.c

index b3df79ba5346a8343b0306f0ac2a6c239262878f..f7fe2d61ba24a7e369f4d8780fdc0da3ce910e27 100644 (file)
--- a/xen/common/domctl.c
+++ b/xen/common/domctl.c
@@ -17,6 +17,7 @@
  #include <xen/trace.h>
  #include <xen/console.h>
  #include <xen/iocap.h>
+#include <xen/rcupdate.h>
  #include <xen/guest_access.h>
  #include <xen/bitmap.h>
  #include <asm/current.h>
@@ -140,12 +141,12 @@ static unsigned int default_vcpu0_location(void)
      cpumask_t      cpu_exclude_map;
  
      /* Do an initial CPU placement. Pick the least-populated CPU. */
-    read_lock(&domlist_lock);
+    rcu_read_lock(&domlist_read_lock);
      for_each_domain ( d )
          for_each_vcpu ( d, v )
          if ( !test_bit(_VCPUF_down, &v->vcpu_flags) )
              cnt[v->processor]++;
-    read_unlock(&domlist_lock);
+    rcu_read_unlock(&domlist_read_lock);
  
      /*
       * If we're on a HT system, we only auto-allocate to a non-primary HT. We 
@@ -480,7 +481,7 @@ long do_domctl(XEN_GUEST_HANDLE(xen_domctl_t) u_domctl)
          if ( dom == DOMID_SELF )
              dom = current->domain->domain_id;
  
-        read_lock(&domlist_lock);
+        rcu_read_lock(&domlist_read_lock);
  
          for_each_domain ( d )
          {
@@ -490,12 +491,12 @@ long do_domctl(XEN_GUEST_HANDLE(xen_domctl_t) u_domctl)
  
          if ( (d == NULL) || !get_domain(d) )
          {
-            read_unlock(&domlist_lock);
+            rcu_read_unlock(&domlist_read_lock);
              ret = -ESRCH;
              break;
          }
  
-        read_unlock(&domlist_lock);
+        rcu_read_unlock(&domlist_read_lock);
  
          getdomaininfo(d, &op->u.getdomaininfo);
  
diff --git a/xen/common/keyhandler.c b/xen/common/keyhandler.c

index 6568407bb277e25302160b2e72461bf66f880137..b99d396399808db483adfbf90584a08f920e907f 100644 (file)
--- a/xen/common/keyhandler.c
+++ b/xen/common/keyhandler.c
@@ -145,7 +145,7 @@ static void dump_domains(unsigned char key)
      printk("'%c' pressed -> dumping domain info (now=0x%X:%08X)\n", key,
             (u32)(now>>32), (u32)now);
  
-    read_lock(&domlist_lock);
+    rcu_read_lock(&domlist_read_lock);
  
      for_each_domain ( d )
      {
@@ -196,7 +196,7 @@ static void dump_domains(unsigned char key)
          }
      }
  
-    read_unlock(&domlist_lock);
+    rcu_read_unlock(&domlist_read_lock);
  }
  
  static cpumask_t read_clocks_cpumask = CPU_MASK_NONE;
diff --git a/xen/common/sched_sedf.c b/xen/common/sched_sedf.c

index 5381fcb3ca7bf1fb3b530056ad6fb0cff906e813..8f79dd23b498922da9a173163a1d93fcf3dd7503 100644 (file)
--- a/xen/common/sched_sedf.c
+++ b/xen/common/sched_sedf.c
@@ -1277,6 +1277,7 @@ static void sedf_dump_cpu_state(int i)
      loop = 0;
      printk("\nnot on Q\n");
  
+    rcu_read_lock(&domlist_read_lock);
      for_each_domain ( d )
      {
          for_each_vcpu(d, ed)
@@ -1288,6 +1289,7 @@ static void sedf_dump_cpu_state(int i)
              }
          }
      }
+    rcu_read_unlock(&domlist_read_lock);
  }
  
  
@@ -1298,8 +1300,9 @@ static int sedf_adjust_weights(struct xen_domctl_scheduler_op *cmd)
      struct domain      *d;
      int                 sumw[NR_CPUS] = { 0 };
      s_time_t            sumt[NR_CPUS] = { 0 };
- 
+
      /* Sum across all weights. */
+    rcu_read_lock(&domlist_read_lock);
      for_each_domain( d )
      {
          for_each_vcpu( d, p )
@@ -1323,8 +1326,10 @@ static int sedf_adjust_weights(struct xen_domctl_scheduler_op *cmd)
              }
          }
      }
+    rcu_read_unlock(&domlist_read_lock);
  
      /* Adjust all slices (and periods) to the new weight. */
+    rcu_read_lock(&domlist_read_lock);
      for_each_domain( d )
      {
          for_each_vcpu ( d, p )
@@ -1341,6 +1346,7 @@ static int sedf_adjust_weights(struct xen_domctl_scheduler_op *cmd)
              }
          }
      }
+    rcu_read_unlock(&domlist_read_lock);
  
      return 0;
  }
diff --git a/xen/common/sysctl.c b/xen/common/sysctl.c

index 69c4d7a6ab71f7c80b547c4a376e623fea33271a..399523ecfd4d77c025779dff0499467c1e34a675 100644 (file)
--- a/xen/common/sysctl.c
+++ b/xen/common/sysctl.c
@@ -78,7 +78,7 @@ long do_sysctl(XEN_GUEST_HANDLE(xen_sysctl_t) u_sysctl)
          struct xen_domctl_getdomaininfo info;
          u32 num_domains = 0;
  
-        read_lock(&domlist_lock);
+        rcu_read_lock(&domlist_read_lock);
  
          for_each_domain ( d )
          {
@@ -106,7 +106,7 @@ long do_sysctl(XEN_GUEST_HANDLE(xen_sysctl_t) u_sysctl)
              num_domains++;
          }
          
-        read_unlock(&domlist_lock);
+        rcu_read_unlock(&domlist_read_lock);
          
          if ( ret != 0 )
              break;
diff --git a/xen/include/xen/rcupdate.h b/xen/include/xen/rcupdate.h

index f098f19d02524efb2144cd136443f6149d8518cf..587a32e2b760e5bb0ff78e7f940545302298e688 100644 (file)
--- a/xen/include/xen/rcupdate.h
+++ b/xen/include/xen/rcupdate.h
@@ -111,6 +111,59 @@ extern struct rcu_ctrlblk rcu_ctrlblk;
  int rcu_pending(int cpu);
  int rcu_needs_cpu(int cpu);
  
+/*
+ * Dummy lock type for passing to rcu_read_{lock,unlock}. Currently exists
+ * only to document the reason for rcu_read_lock() critical sections.
+ */
+struct _rcu_read_lock {};
+typedef struct _rcu_read_lock rcu_read_lock_t;
+#define DEFINE_RCU_READ_LOCK(x) rcu_read_lock_t x
+
+/**
+ * rcu_read_lock - mark the beginning of an RCU read-side critical section.
+ *
+ * When call_rcu() is invoked
+ * on one CPU while other CPUs are within RCU read-side critical
+ * sections, invocation of the corresponding RCU callback is deferred
+ * until after the all the other CPUs exit their critical sections.
+ *
+ * Note, however, that RCU callbacks are permitted to run concurrently
+ * with RCU read-side critical sections.  One way that this can happen
+ * is via the following sequence of events: (1) CPU 0 enters an RCU
+ * read-side critical section, (2) CPU 1 invokes call_rcu() to register
+ * an RCU callback, (3) CPU 0 exits the RCU read-side critical section,
+ * (4) CPU 2 enters a RCU read-side critical section, (5) the RCU
+ * callback is invoked.  This is legal, because the RCU read-side critical
+ * section that was running concurrently with the call_rcu() (and which
+ * therefore might be referencing something that the corresponding RCU
+ * callback would free up) has completed before the corresponding
+ * RCU callback is invoked.
+ *
+ * RCU read-side critical sections may be nested.  Any deferred actions
+ * will be deferred until the outermost RCU read-side critical section
+ * completes.
+ *
+ * It is illegal to block while in an RCU read-side critical section.
+ */
+#define rcu_read_lock(x)       do { } while (0)
+
+/**
+ * rcu_read_unlock - marks the end of an RCU read-side critical section.
+ *
+ * See rcu_read_lock() for more information.
+ */
+#define rcu_read_unlock(x)     do { } while (0)
+
+/*
+ * So where is rcu_write_lock()?  It does not exist, as there is no
+ * way for writers to lock out RCU readers.  This is a feature, not
+ * a bug -- this property is what provides RCU's performance benefits.
+ * Of course, writers must coordinate with each other.  The normal
+ * spinlock primitives work well for this, but any other technique may be
+ * used as well.  RCU does not care how the writers keep out of each
+ * others' way, as long as they do so.
+ */
+
  /**
   * rcu_dereference - fetch an RCU-protected pointer in an
   * RCU read-side critical section.  This pointer may later
diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h

index 2204a8e430b64f1e991f740788b8c775f326bc52..a371c36de02c9305aa13eacfc78c6fda94456d42 100644 (file)
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -16,6 +16,7 @@
  #include <xen/rangeset.h>
  #include <asm/domain.h>
  #include <xen/xenoprof.h>
+#include <xen/rcupdate.h>
  #include <xen/irq.h>
  
  #ifdef CONFIG_COMPAT
@@ -24,7 +25,6 @@ DEFINE_XEN_GUEST_HANDLE(vcpu_runstate_info_compat_t);
  #endif
  
  extern unsigned long volatile jiffies;
-extern rwlock_t domlist_lock;
  
  /* A global pointer to the initial domain (DOM0). */
  extern struct domain *dom0;
@@ -193,6 +193,8 @@ struct domain
      /* OProfile support. */
      struct xenoprof *xenoprof;
      int32_t time_offset_seconds;
+
+    struct rcu_head rcu;
  };
  
  struct domain_setup_info
@@ -356,16 +358,17 @@ unsigned long hypercall_create_continuation(
          local_events_need_delivery()            \
      ))
  
-/* This domain_hash and domain_list are protected by the domlist_lock. */
-#define DOMAIN_HASH_SIZE 256
-#define DOMAIN_HASH(_id) ((int)(_id)&(DOMAIN_HASH_SIZE-1))
-extern struct domain *domain_hash[DOMAIN_HASH_SIZE];
+/* Protect updates/reads (resp.) of domain_list and domain_hash. */
+extern spinlock_t domlist_update_lock;
+extern rcu_read_lock_t domlist_read_lock;
+
  extern struct domain *domain_list;
  
+/* Caller must hold the domlist_read_lock or domlist_update_lock. */
  #define for_each_domain(_d)                     \
- for ( (_d) = domain_list;                      \
+ for ( (_d) = rcu_dereference(domain_list);     \
         (_d) != NULL;                            \
-       (_d) = (_d)->next_in_list )
+       (_d) = rcu_dereference((_d)->next_in_list )) \
  
  #define for_each_vcpu(_d,_v)                    \
   for ( (_v) = (_d)->vcpu[0];                    \
author	kfraser@localhost.localdomain <kfraser@localhost.localdomain>
	Wed, 21 Feb 2007 16:13:49 +0000 (16:13 +0000)
committer	kfraser@localhost.localdomain <kfraser@localhost.localdomain>
	Wed, 21 Feb 2007 16:13:49 +0000 (16:13 +0000)
xen/acm/acm_chinesewall_hooks.c		patch \| blob \| history
xen/acm/acm_simple_type_enforcement_hooks.c		patch \| blob \| history
xen/arch/ia64/linux-xen/mca.c		patch \| blob \| history
xen/arch/ia64/linux-xen/perfmon.c		patch \| blob \| history
xen/arch/powerpc/audit.c		patch \| blob \| history
xen/arch/x86/hvm/svm/vmcb.c		patch \| blob \| history
xen/arch/x86/hvm/vmx/vmcs.c		patch \| blob \| history
xen/arch/x86/mm/shadow/common.c		patch \| blob \| history
xen/arch/x86/time.c		patch \| blob \| history
xen/common/domain.c		patch \| blob \| history
xen/common/domctl.c		patch \| blob \| history
xen/common/keyhandler.c		patch \| blob \| history
xen/common/sched_sedf.c		patch \| blob \| history
xen/common/sysctl.c		patch \| blob \| history
xen/include/xen/rcupdate.h		patch \| blob \| history
xen/include/xen/sched.h		patch \| blob \| history